{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Decision Tree for Spam \n",
    "### (make sure to have run featurize_spam.py)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.random.seed(1)\n",
    "\n",
    "import scipy.io\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.decomposition import PCA\n",
    "from decisiontree import *\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((3336, 4), (3336,), (5857, 4))"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spam_path = \"datasets/spam-dataset/spam_data.mat\"\n",
    "data = scipy.io.loadmat(spam_path)\n",
    "x = data[\"training_data\"]\n",
    "x = StandardScaler().fit_transform(x)\n",
    "pca = PCA(n_components=0.1)\n",
    "pca_fit = pca.fit(x)\n",
    "x = pca_fit.transform(x)\n",
    "y = np.squeeze(data[\"training_labels\"])\n",
    "x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)\n",
    "x_test = pca_fit.transform(data[\"test_data\"])\n",
    "x.shape,y.shape,x_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 26.1 s, sys: 121 ms, total: 26.3 s\n",
      "Wall time: 25.7 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "dt = DecisionTree(max_depth=5,header=\"spam_decisiontree\")\n",
    "dt.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training accuracy: 0.8024737631184408\n",
      "validation accuracy: 0.7679640718562875\n",
      "saved predictions\n",
      "CPU times: user 83 ms, sys: 2.52 ms, total: 85.5 ms\n",
      "Wall time: 85.4 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "train_preds = dt.predict(x_train,\"train\")\n",
    "print(\"training accuracy:\",np.mean(train_preds==y_train))\n",
    "val_preds = dt.predict(x_val,\"val\")\n",
    "print(\"validation accuracy:\",np.mean(val_preds==y_val))\n",
    "dt.predict(x_test,\"test\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.74230434  0.36332807 -0.26175289 -0.68227287]\n",
      "[ 0.29276416 -1.3366646   2.19376482  1.44083043]\n",
      "[[ 0.0580845   0.03290297  0.11068428  0.10132154  0.14326055  0.10786764\n",
      "   0.05709394  0.07685877  0.10762476  0.13951499]\n",
      " [-0.01093026  0.02112622  0.0370875   0.02724811  0.06467807  0.04989163\n",
      "   0.01995069  0.02913266  0.02971837  0.08542487]\n",
      " [-0.04849529 -0.12604642 -0.11396102 -0.09335385 -0.15598889 -0.09255568\n",
      "  -0.12256773 -0.09831799 -0.08049307 -0.05109376]\n",
      " [-0.02349031 -0.1201852  -0.01062051 -0.05757403 -0.0224421   0.02003524\n",
      "   0.0599332   0.00714026  0.01137459  0.0066579 ]]\n"
     ]
    }
   ],
   "source": [
    "spam_ex, ham_ex = 0, 3330\n",
    "print(x[spam_ex,:])\n",
    "print(x[ham_ex,:])      \n",
    "#compare these values to csv file to find original feature names\n",
    "print(pca.components_[:,:10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Depth 0: feature # 1 > -0.7780129071706656\n",
      "\n",
      "\n",
      "Depth 1: feature # 0 > 2.6189832383004514\n",
      "Depth 1: feature # 2 > 0.9921342449684696\n",
      "\n",
      "\n",
      "Depth 2: feature # 2 > 3.949073938824735\n",
      "Depth 2: feature # 0 > -3.161120498538386\n",
      "Depth 2: 0\n",
      "Depth 2: feature # 0 > -0.7690328621550679\n",
      "\n",
      "\n",
      "Depth 3: 1\n",
      "Depth 3: feature # 3 > -2.9930528367786953\n",
      "Depth 3: feature # 2 > 0.9073762092553337\n",
      "Depth 3: 0\n",
      "Depth 3: feature # 2 > -0.05866680006281934\n",
      "Depth 3: feature # 1 > -1.9683429838272546\n",
      "\n",
      "\n",
      "Depth 4: feature # 1 > 3.9852817301579764\n",
      "Depth 4: 1\n",
      "Depth 4: feature # 3 > -1.6476489326418253\n",
      "Depth 4: feature # 0 > -1.0477664390728805\n",
      "Depth 4: 0\n",
      "Depth 4: 1\n",
      "Depth 4: feature # 1 > -1.9539107909544002\n",
      "Depth 4: 1\n",
      "\n",
      "\n",
      "Depth 5: 0\n",
      "Depth 5: 0\n",
      "Depth 5: 0\n",
      "Depth 5: 1\n",
      "Depth 5: 0\n",
      "Depth 5: 1\n",
      "Depth 5: 1\n",
      "Depth 5: 0\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "dt.print_tree()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "finished predicting with depth 0\n",
      "finished predicting with depth 5\n",
      "finished predicting with depth 10\n",
      "finished predicting with depth 15\n",
      "finished predicting with depth 20\n",
      "finished predicting with depth 25\n",
      "finished predicting with depth 30\n",
      "finished predicting with depth 35\n",
      "CPU times: user 19min 29s, sys: 6.02 s, total: 19min 35s\n",
      "Wall time: 19min 51s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "val_accs = []\n",
    "for i in range(40):\n",
    "    dt = DecisionTree(max_depth=i,header=\"spam_decisiontree\")\n",
    "    dt.fit(x_train, y_train)\n",
    "    val_preds = dt.predict(x_val,\"val\")\n",
    "    val_accs.append(np.mean(val_preds==y_val))\n",
    "    if i % 5 == 0:\n",
    "        print(\"finished predicting with depth\",i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XmYXHWd7/H3J02SzkI2OmxZSIIhbCJLxFGQC4ws4gKOjuI4z1VHZa4jjOJscGceF7x31mdER7njgGZ0HBG3ETNOLgFBcAEkCSKQhIaQBOgbOgnQna076e17/zi/Tk461V2V5XRVuj6v56mnzjl1TtW3DqS+fX6/8/v+FBGYmZkNZVS1AzAzs9rnZGFmZmU5WZiZWVlOFmZmVpaThZmZleVkYWZmZTlZmJlZWU4WZmZWVqHJQtLlkpolrZF0Q4nXZ0v6qaRfS3pc0hW5125MxzVLuqzIOM3MbGgqagS3pAbgaeASoAVYBrw3Ilbl9rkV+HVE/LOkU4ElETEnLX8bOBc4HvgJcFJE9A72eU1NTTFnzpxCvouZ2Ui1YsWKlyJiern9jigwhnOBNRGxFkDSHcCVwKrcPgFMSsuTgQ1p+UrgjojYBayTtCa930ODfdicOXNYvnz5of0GZmYjnKTnKtmvyGaoGcALufWWtC3vM8DvS2oBlgDX7cexZmY2TIpMFiqxbWCb13uBr0fETOAK4JuSRlV4LJKukbRc0vLNmzcfdMBmZlZakcmiBZiVW5/Jnmamfh8CvgsQEQ8BjUBThccSEbdGxMKIWDh9etkmNzMzO0BFJotlwHxJcyWNAa4GFg/Y53ngtwEknUKWLDan/a6WNFbSXGA+8EiBsZqZ2RAK6+COiB5J1wJLgQZgUUSslHQTsDwiFgN/Atwm6XqyZqYPRHZ71kpJ3yXrDO8BPjbUnVBmZlaswm6dHW4LFy4M3w1lZrZ/JK2IiIXl9vMIbjMzK8vJYgTbuHUni36xjlUbtjJSriDNrDqKHJRnVfY3S1Zz52PZTWQzpozj4pOP5uJTjub1846icXRDlaMzs8OJk8UI1bplJz9+/EXevXAm55wwlZ+s3sT3V7TwzYefY/yYBs5/VRNvOuUYzpg1mVEqNazFzA4XjUc0MPuo8YV+hpPFCPWNh9bTF8F1F89n1rTxvOe1s9nZ3ctDa1/m3tUbuXf1Ju5etbHaYZrZIXDmrCnc+bHzCv0MJ4sRaMeuHr718HNcfvqxzJq256+NxtENXLTgaC5acDSfuzJY9eJW1r/UUcVIzexQmDJ+dOGf4WQxAv3g0Ra27uzhQ+fPG3QfSZx2/GROO37yMEZmZocr3w01wvT2BYt+sY4zZ03hnBOmVjscMxshnCxGmHtXb2T9yx18+I1zqx2KmY0gThYjzFd/sY4ZU8Zx+WnHVjsUMxtBnCxGkCdatvDIulf44HlzOKLB/2nN7NDxL8oI8rVfrGXi2CN492tnld/ZzGw/OFmMEC9u6eTHj7/Ie147i0mNxd9GZ2b1xclihPjGg8/RF8EH3jCn2qGY2QjkZDEC7NjVw+2/eo43n37cXoPwzMwOFSeLEeD7K7JBeH9wvm+XNbNiOFkc5nr7gkW/XMdZsz0Iz8yK42RxmLt39Uaee7mDDw9R2sPM7GA5WRzm+gfhXXbaMdUOxcxGMBcSrGE7dvXwln/6Oa1bdw66z87uPv7qLad4EJ6ZFcrJooY98PRm1r/cwe+eM5NpE8aU3KdxdAPve90JwxyZmdUbJ4satnRlK1PHj+ZvfufVvnIws6ryL1CN6urp477Vm7jk1GOcKMys6vwrVKMefPYltu3q4TJXjzWzGuBkUaOWrtzIhDENnPeqpmqHYmbmZFGLevuCe1a1cuHJR9M4uqHa4ZiZOVnUokefb+Ol7V1ugjKzmuFkUYOWPtnKmIZRXLRgerVDMTMDnCxqTkRw18pWznvVURzpeSnMrEY4WdSYVS9upaWt001QZlZTCk0Wki6X1CxpjaQbSrx+s6TH0uNpSe2513pzry0uMs5asnTlRkYJ3nSqaz2ZWe0obAS3pAbgFuASoAVYJmlxRKzq3ycirs/tfx1wVu4tOiPizKLiq1V3r2xl4ZxpNE0cW+1QzMx2K/LK4lxgTUSsjYgu4A7gyiH2fy/w7QLjqXnrX9rBU63b3ARlZjWnyGQxA3ght96Stu1D0gnAXOC+3OZGScslPSzpquLCrB1LV7YCcKmboMysxhRZSFAltsUg+14NfD8ienPbZkfEBknzgPskPRERz+71AdI1wDUAs2fPPhQxV9XSla2cPmOS59E2s5pT5JVFCzArtz4T2DDIvlczoAkqIjak57XA/ezdn9G/z60RsTAiFk6ffniPSdi4dSePPt/OZae6CcrMak+RyWIZMF/SXEljyBLCPnc1SVoATAUeym2bKmlsWm4CzgNWDTx2JLl71UYALjvdycLMak9hzVAR0SPpWmAp0AAsioiVkm4ClkdEf+J4L3BHROSbqE4B/kVSH1lC+9v8XVQj0d0rW5nXNIH5R0+sdihmZvsodPKjiFgCLBmw7VMD1j9T4rgHgVcXGVst2dLRzUPPvsyH3zgPqVRXj5lZdXkEdw2496mN9PQFl53mu6DMrDY5WdSApStbOXZSI6+ZOaXaoZiZleRkUWWdXb088PRmLj3tGEaNchOUmdUmJ4sqe+Dpzezs7vOobTOraYV2cNvgtu3sZt1LO7hj2fNMHjeac+dOq3ZIZmaDcrIoUFdPHy+0dbB28w7WvbSddS/t4NnNO1j30g42b9u1e7/fe91sRjf4Is/MapeTxSGwcetOnt2cJYO1KRms3bydF9o66e3bM3xk2oQxzGuawIUnTWfu9AnMa5rIvOkTOHG6x1aYWW1zsjhI3132An/+g8d3rzeOHsWcoyZw2vGTeesZxzO3aUJKDBOYMn5MFSM1MztwThYH6eF1L3PUhDF88eqzmDd9AsdOavRdTWY24jhZHKTm1m2cNmMy589vqnYoZmaFca/qQejp7eOZTds5+dgjqx2KmVmhnCwOwvqXO+jq6WPBMU4WZjayOVkchObWbQAs8JWFmY1wThYHobl1K6MEr3JZcTMb4comC0mnD0cgh6OnWrcxp2kCjaMbqh2KmVmhKrmy+IqkRyT9kSSXRc1p3rjNndtmVhfKJouIOB94H9l82ssl3S7pksIjq3EdXT08/0oHC46ZVO1QzMwKV1GfRUQ8A/wV8BfAfwP+SdJTkn6nyOBq2dMbtxPhzm0zqw+V9FmcIelmYDVwMfC2iDglLd9ccHw1q7l1K4CbocysLlQygvvLwG3A/4yIzv6NEbFB0l8VFlmNe6p1G+NGNzB72vhqh2JmVrhKksUVQGdE9AJIGgU0RkRHRHyz0OhqWHPrNk46ZqLrQJlZXaikz+InwLjc+vi0ra41t25zf4WZ1Y1KkkVjRGzvX0nLdd328tL2Xby8o4sFx/pOKDOrD5Ukix2Szu5fkXQO0DnE/iNef5kPd26bWb2opM/iE8D3JG1I68cB7ykupNr3lGtCmVmdKZssImKZpJOBBYCApyKiu/DIalhz61aaJo6haeLYaodiZjYsKp38aAFwKtAInCWJiPi34sKqbe7cNrN6U8mgvE8DX0qPi4C/B95ecFw1q68veHrjdpf5MLO6UkkH97uA3wZaI+KDwGuAum1/ef6VDjq7e925bWZ1pZJk0RkRfUCPpEnAJmBesWHVrv7O7ZOcLMysjlSSLJan0uS3ASuAR4FHKnlzSZdLapa0RtINJV6/WdJj6fG0pPbca++X9Ex6vL/C71O45tZtSHDSMZ7wyMzqx5Ad3JIE/E1EtJPNa3EXMCkiHi/3xpIagFuAS4AWYJmkxRGxqn+fiLg+t/91wFlpeRrwaWAhEMCKdGzb/n7BQ61541ZmTxvP+DGV3htgZnb4G/LKIiICuDO3vr6SRJGcC6yJiLUR0QXcAVw5xP7vBb6dli8D7omIV1KCuAe4vMLPLdRTrdtYcIyboMysvlTSDPWwpNcewHvPAF7IrbekbfuQdAIwF7hvf48dTju7e1n/0g53bptZ3amkLeUi4A8lPQfsIBuYFxFxRpnjSpVjjUH2vRr4fn9l20qPlXQNcA3A7Nmzy4Rz8NZs2k5f4JpQZlZ3KkkWbz7A924hm4q130xgwyD7Xg18bMCxFw449v6BB0XErcCtAAsXLhwsER0yLvNhZvWqkmaoGORRzjJgvqS5ksaQJYTFA3eStACYCjyU27wUuFTSVElTgUvTtqpqbt3KmCNGMeeoui66a2Z1qJIri/8iSw4iK/cxF2gGThvqoIjokXQt2Y98A7AoIlZKuglYHhH9ieO9wB2pM73/2FckfY4s4QDcFBGv7Mf3KsRTrduYf/REjmioaOpyM7MRo5JCgq/Or6dy5X9YyZtHxBJgyYBtnxqw/plBjl0ELKrkc4ZLc+s2zp/fVO0wzMyG3X7/iRwRjwIHcnfUYa1tRxebtu3ynVBmVpfKXllI+mRudRRwNrC5sIhq1J7Obd8JZWb1p5I+i/yf0j1kfRg/KCac2tXcuhXw7HhmVp8q6bP47HAEUuuaN25jyvjRHH1k3RbcNbM6Vsl8FvekQoL961MlVf021uHWX+YjK5dlZlZfKungnp4KCQKQajUdXVxItScieLp1m5ugzKxuVZIseiXtrqWR6jgVPlq6lrS0dbKjq9ed22ZWtyrp4P5L4BeSHkjrF5DqMdWL5t13QnkOCzOrT5V0cN+VBuL9Ftko7usj4qXCI6shzRvT7HguTW5mdaqSDu53AN0R8eOI+E+y6VWvKj602vFU6zZmTBnHkY2jqx2KmVlVVNJn8emI2NK/kjq7P11cSLWnuXWrO7fNrK5VkixK7VM3c4p29fSxdvMOlyU3s7pWSbJYLunzkk6UNE/SzcCKogOrFc9u3k5PXzhZmFldqyRZXAd0Ad8BvgfsZO+Jika0F17pAGBu04QqR2JmVj2V3A21A7hhGGKpSe0d3QBMHT+mypGYmVVPJVVnpwN/TjbZUWP/9oi4uMC4akZ7ZxcAU8b7Tigzq1+VNEN9C3iKbIa8zwLr2TOD3YjX3tHNEaPExLF106dvZraPSpLFURHxNbKxFg9ExB+QDdCrC20d3UwZP9oFBM2srlXy53J3en5R0luADcDM4kKqLVs6u5g8zk1QZlbfKkkW/0vSZOBPgC8Bk4DrC42qhrR3dDPFndtmVucquRvqx2lxC3BRseHUnraObmZMaSy/o5nZCFZJn0Vd29LRxeRxvrIws/rmZFFGW0c3U33brJnVOSeLIezs7qWzu9djLMys7lUyKG8s8E5gTn7/iLipuLBqw9bO7Eawye7gNrM6V8ndUD8i69xeAewqNpza0ra71IevLMysvlWSLGZGxOWFR1KD2jtSqQ93cJtZnaukz+JBSa8uPJIa1J6aodxnYWb1rpIri/OBD0haR9YMJSAi4oxCI6sBu68snCzMrM5VkizeXHgUNaq/PLlHcJtZvSvbDBURzwFTgLelx5S0rSxJl0tqlrRGUsk5MSS9W9IqSSsl3Z7b3ivpsfRYXNnXObTaO7sZ3SAmjGmoxsebmdWMSm6d/TjwEeA/0qZ/l3RrRHypzHENwC3AJUALsEzS4ohYldtnPnAjcF5EtEk6OvcWnRFx5v59nUOrPY3edsVZM6t3lTRDfQh4XZoxD0l/BzxEVlRwKOcCayJibTruDuBKYFVun48At0REG0BEbNq/8IvVnsqTm5nVu0ruhhLQm1vvTdvKmQG8kFtvSdvyTgJOkvRLSQ9Lyt+i2yhpedp+VQWfd8i1dXR5jIWZGZVdWfwr8CtJP0zrVwFfq+C4UgklSnz+fOBCsjkyfi7p9IhoB2ZHxAZJ84D7JD0REc/u9QHSNcA1ALNnz64gpP3T3tHNzKnjD/n7mpkdbirp4P488EHgFaAN+GBEfKGC924BZuXWZ5JNnDRwnx9FRHdErAOayZIHEbEhPa8F7gfOKhHbrRGxMCIWTp8+vYKQ9s+WTjdDmZnBEMlC0qT0PI1s3u1/B74JPJe2lbMMmC9prqQxwNXAwLua7iTNkSGpiaxZaq2kqakmVf/289i7r2NYuBnKzCwzVDPU7cBbyWpC5ZuPlNbnDfXGEdEj6VpgKdAALIqIlZJuApZHxOL02qWSVpH1hfxZRLws6Q3Av0jqI0tof5u/i2o47OzuZWd3n8dYmJkxRLKIiLem57kH+uYRsQRYMmDbp3LLAXwyPfL7PAhUtcTIlv6Ks55/28ysfJ+FpHsr2TbStKVSH1N9ZWFmNviVhaRGYDzQJGkqe+5umgQcPwyxVdWeUh++sjAzG6rP4g+BT5AlhhXsSRZbyUZmj2hOFmZmewzVZ/FF4IuSritX2mMk2lNx1s1QZmZlB+VFxJcknQ6cCjTmtv9bkYFV2+65LNzBbWZWUSHBT5ONsD6V7M6mNwO/AEZ2sujoZkzDKMa74qyZWUW1od4F/DbQGhEfBF4DjC00qhrQ3tHF5PGjXXHWzIzKkkVnRPQBPWlU9ybKDMgbCdo7ut0EZWaWVFJIcLmkKcBtZHdFbQceKTSqGpCV+nDntpkZVNbB/Udp8SuS7gImRcTjxYZVfVs6u5k1zRVnzcxg6EF5Zw/1WkQ8WkxItaG9o5tXz3AzlJkZDH1l8Y/puRFYCPyGbGDeGcCvgPOLDa262jq6mDrBzVBmZjBEB3dEXBQRFwHPAWeneSPOIZtXYs1wBVgNO7t72dXT5yKCZmZJJXdDnRwRT/SvRMSTwJnFhVR9/aU+3MFtZpap5G6o1ZK+Sjb5UQC/D6wuNKoqa9td6sNXFmZmUFmy+CDwUeDjaf1nwD8XFlEN2F1E0M1QZmZAZbfO7gRuTo+6sKXTRQTNzPKGunX2uxHxbklPsPe0qgBExBmFRlZFbS5Pbma2l6GuLPqbnd46HIHUEs9lYWa2t6Hms3gxPT83fOHUhvaOLsYcMYpxo11x1swMhm6G2kaJ5ieygXkREZMKi6rK+osIuuKsmVlmqCuLI4czkFrS3ukigmZmeZXcOguApKPZe6a85wuJqAa0dXQz2f0VZma7lR3BLentkp4B1gEPAOuB/1twXFW1xXNZmJntpZJyH58Dfgt4OiLmks2a98tCo6oyN0OZme2tkmTRHREvA6MkjYqInzKCa0NFBG0d3b5t1swsp5I+i3ZJE8nKfHxL0iagp9iwqmdndx9dPX3uszAzy6nkyuJKoAO4HrgLeBZ4W5FBVVN7KvXhZigzsz0qubK4BvheRLQA3yg4nqpr2+EigmZmA1VyZTEJWCrp55I+JumYooOqpv4rCzdDmZntUTZZRMRnI+I04GPA8cADkn5SyZtLulxSs6Q1km4YZJ93S1olaaWk23Pb3y/pmfR4f4Xf56B54iMzs31VPCgP2AS0Ai8DR5fbWVIDcAtwCdACLJO0OCJW5faZD9wInBcRbWngH5KmAZ8mm/s7gBXp2Lb9iPeAuIigmdm+KhmU91FJ9wP3Ak3ARyosT34usCYi1kZEF3AHWWd53keAW/qTQERsStsvA+6JiFfSa/cAl1fyhQ6WO7jNzPZVyZXFCcAnIuKx/XzvGcALufUW4HUD9jkJQNIvgQbgMxFx1yDHztjPzz8g7R3djD1iFI2uOGtmtlslM+WV7GuoQKmSrQOr2B4BzAcuBGYCP5d0eoXHIukasru1mD179gGGubf2ji43QZmZDVDJ3VAHqgWYlVufCWwosc+PIqI7ItYBzWTJo5JjiYhbI2JhRCycPn36IQm6vaPbTVBmZgMUmSyWAfMlzZU0BrgaWDxgnzuBiwAkNZE1S60FlgKXSpoqaSpwadpWuPaObiZ7jIWZ2V72526o/RIRPZKuJfuRbwAWRcRKSTcByyNiMXuSwiqgF/izVIcKSZ8jSzgAN0XEK0XFmtfe2cXcpgnD8VFmZoeNwpIFQEQsAZYM2Pap3HIAn0yPgccuAhYVGV8pboYyM9tXkc1Qh52IyJqh3MFtZrYXJ4uczu5eunr7fGVhZjaAk0XO7tHb7uA2M9uLk0VOW0c2etvjLMzM9uZkkbNld10oN0OZmeU5WeS0uYigmVlJThY5/UUEp4zzlYWZWZ6TRY7Lk5uZleZkkdPe0UXjaFecNTMbyMkip72j201QZmYlOFnktHd2uwnKzKwEJ4scz2VhZlaak0WOiwiamZXmZJHjZigzs9KcLJKs4mwXk93BbWa2DyeLpKOrl+7eYKqvLMzM9uFkkbiIoJnZ4Jwskv7R226GMjPbl5NFsqUzSxZuhjIz25eTRbKnGcpXFmZmAzlZJC4iaGY2OCeLpL8ZarKnVDUz24eTRdK2o4txoxtccdbMrAQni6S9s9ud22Zmg3CySNo7upnszm0zs5KcLJL2ji6muL/CzKwkJ4ukvbObqROcLMzMSnGySFxE0MxscE4W9FecdXlyM7PBOFkAO7p66elzxVkzs8EUmiwkXS6pWdIaSTeUeP0DkjZLeiw9Ppx7rTe3fXGRcbbtSKU+3AxlZlbSEUW9saQG4BbgEqAFWCZpcUSsGrDrdyLi2hJv0RkRZxYVX17/6G03Q5mZlVbklcW5wJqIWBsRXcAdwJUFft4B21MXylcWZmalFJksZgAv5NZb0raB3inpcUnflzQrt71R0nJJD0u6qsA4PfGRmVkZRSYLldgWA9b/E5gTEWcAPwG+kXttdkQsBH4P+IKkE/f5AOmalFCWb968+YADbXczlJnZkIpMFi1A/kphJrAhv0NEvBwRu9LqbcA5udc2pOe1wP3AWQM/ICJujYiFEbFw+vTpBxzolnRl4YqzZmalFZkslgHzJc2VNAa4GtjrriZJx+VW3w6sTtunShqblpuA84CBHeOHTFtHN+PHNDD2CFecNTMrpbC7oSKiR9K1wFKgAVgUESsl3QQsj4jFwB9LejvQA7wCfCAdfgrwL5L6yBLa35a4i+qQae/oZqo7t83MBlVYsgCIiCXAkgHbPpVbvhG4scRxDwKvLjK2vKzUh5ugzMwG4xHcZB3c7tw2MxuckwXZlYWboczMBudkQf/ER76yMDMbTN0ni4jwlKpmZmXUfbLYvquH3r5wEUEzsyHUfbLo7QveesZxnHTskdUOxcysZhV66+zhYMr4MXz5986udhhmZjWt7q8szMysPCcLMzMry8nCzMzKcrIwM7OynCzMzKwsJwszMyvLycLMzMpysjAzs7IUMXBa7MOTpM3AcwfxFk3AS4conEPNsR0Yx3ZgHNuBOVxjOyEiys5LPWKSxcGStDwiFlY7jlIc24FxbAfGsR2YkR6bm6HMzKwsJwszMyvLyWKPW6sdwBAc24FxbAfGsR2YER2b+yzMzKwsX1mYmVlZdZ8sJF0uqVnSGkk3VDuePEnrJT0h6TFJy2sgnkWSNkl6MrdtmqR7JD2TnqfWSFyfkfT/0rl7TNIVwx1XimOWpJ9KWi1ppaSPp+21cN4Gi63q505So6RHJP0mxfbZtH2upF+l8/YdScM+xeUQsX1d0rrceTtzuGPLxdgg6deSfpzWD/68RUTdPoAG4FlgHjAG+A1warXjysW3Hmiqdhy5eC4AzgaezG37e+CGtHwD8Hc1EtdngD+tgXN2HHB2Wj4SeBo4tUbO22CxVf3cAQImpuXRwK+A3wK+C1ydtn8F+GgNxfZ14F3V/n8uxfVJ4Hbgx2n9oM9bvV9ZnAusiYi1EdEF3AFcWeWYalZE/Ax4ZcDmK4FvpOVvAFcNa1AMGldNiIgXI+LRtLwNWA3MoDbO22CxVV1ktqfV0ekRwMXA99P2ap23wWKrCZJmAm8BvprWxSE4b/WeLGYAL+TWW6iRfyxJAHdLWiHpmmoHM4hjIuJFyH58gKOrHE/etZIeT81Uw97MM5CkOcBZZH+J1tR5GxAb1MC5S00pjwGbgHvIWgHaI6In7VK1f68DY4uI/vP2v9N5u1nS2GrEBnwB+HOgL60fxSE4b/WeLFRiW838hQCcFxFnA28GPibpgmoHdBj5Z+BE4EzgReAfqxmMpInAD4BPRMTWasYyUInYauLcRURvRJwJzCRrBTil1G7DG1X60AGxSToduBE4GXgtMA34i+GOS9JbgU0RsSK/ucSu+33e6j1ZtACzcuszgQ1VimUfEbEhPW8Cfkj2D6bWbJR0HEB63lTleACIiI3pH3QfcBtVPHeSRpP9GH8rIv4jba6J81Yqtlo6dymeduB+sn6BKZKOSC9V/d9rLrbLU7NeRMQu4F+pznk7D3i7pPVkzeoXk11pHPR5q/dksQyYn+4UGANcDSyuckwASJog6cj+ZeBS4Mmhj6qKxcD70/L7gR9VMZbd+n+Ik3dQpXOX2ou/BqyOiM/nXqr6eRsstlo4d5KmS5qSlscBbyLrU/kp8K60W7XOW6nYnsolf5H1CQz7eYuIGyNiZkTMIfs9uy8i3sehOG/V7rWv9gO4guwukGeBv6x2PLm45pHdnfUbYGUtxAZ8m6xZopvsquxDZO2h9wLPpOdpNRLXN4EngMfJfpiPq9I5O5/skv9x4LH0uKJGzttgsVX93AFnAL9OMTwJfCptnwc8AqwBvgeMraHY7kvn7Ung30l3TFXrAVzInruhDvq8eQS3mZmVVe/NUGZmVgEnCzMzK8vJwszMynKyMDOzspwszMysLCcLO+ykqqh/eoDHPljm9SX999AfDElXSTr1YN+nws/6uqR3ld+z5LEXSnrDoXgvG9mcLKyuRMQbyrx+RWSjcg/WVWQVXPeRG0lbCy4EhjwnZuBkYYcJSX+pbN6RnwALcttPlHRXKrb4c0knp+3HSPphmnPgN/1/PUvanp6Pk/SzNO/Ak5LemLavl9SUlj+ZXntS0ifStjnK5n+4Lc1lcHcaxZuP9Q3A24F/SO9/oqT7Jf21pAeAj6dRwD+QtCw9zkvHTkjF+5al+Qj2qYKszJclrZL0X+SKEEo6R9ID6XwszY0qvl/SFyQ9mL7Pual44P8Ark9xvjG9zQVpv7W+yrDdqjnC0A8/KnkA55CNjB0PTCIbhfqn6bV7gflp+XVk5Q0AvkNWGA+yeUsmp+Xt6flPSKPi0+tHpuX1QFPuMycAE8lG0Z8FzAF6gDPT/t8Ffr9EzF8nN7cBWf2g/5PxyikvAAACjUlEQVRbvx04Py3PJiu5AfDX/e8HTCGrLjBhwHv/DlkV1gbgeKCdrJTDaOBBYHra7z3Aotzn35aWLyDN/cGAuStS3N8j+0PyVLIS/lX/f8CP6j9q6XLYbDBvBH4YER0Akhan54lkTSjfy8rxANBfFvpi4L9DViEU2DLgPZcBi1IhvTsj4rEBr5+fPnNH+qz/SHEsBtbl9l9BlkAq8Z3c8puAU3NxT0q1wC4lKwTX3yfTSEomuWMvAL6dvtcGSfel7QuA04F70vs2kJVB6fdtyOb/kDRpiL6ZOyMrIrhK0jEVfjcb4Zws7HBRqi7NKLI6/fs9fWX6wbyAbJKYb0r6h4j4t9wupco699uVW+4Fxg224wA7csujgNdHRGd+h1SE7p0R0VzmvUqdDwErI+L1FR4zWK2f/Pcb6jxYHXGfhR0Ofga8Q9K49Nf32wAim3thnaTfhd1t+a9Jx9wLfDRtb5A0Kf+Gkk4gq/t/G1nl1bNLfOZVksYrq/r7DuDn+xHzNrKpSgdzN3BtLp7+hLcUuC4lDSSdVeLYnwFXp+91HHBR2t4MTJf0+nTsaEmn5Y57T9p+PrAlIrZUEKcZ4GRhh4HIpv78DllV1B+w94/2+4APSeqvztvfIfxx4CJJT5A1FeV/NCG7C+gxSb8G3gl8scRnfp2sUuevgK9GxK/3I+w7gD9LndQnlnj9j4GFymZVW0XW0QzwObK+h8clPZnWB/ohWbXaJ8gmKnogxdxF1nfxd+l8PMbedzq1Kbt1+CtklXkB/pMsEec7uM324aqzZnVA0v1kHdnLqx2LHZ58ZWFmZmX5ysLMzMrylYWZmZXlZGFmZmU5WZiZWVlOFmZmVpaThZmZleVkYWZmZf1/CnG9hVcKgnwAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.xlabel(\"decision tree depth\")\n",
    "plt.ylabel('validation accuracy')\n",
    "plt.plot([i for i in range(len(val_accs))], val_accs);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7\n"
     ]
    }
   ],
   "source": [
    "print(np.argmax(val_accs))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
